WGCNA GO Analysis

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35     R6_2.5.1          fastmap_1.1.1     xfun_0.43        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.8.1 rmarkdown_2.26   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.9        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.16.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.7.0       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8
#BiocManager::install("simplifyEnrichment")

First, load the necessary packages.

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
library(grDevices)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
## The following object is masked from 'package:circlize':
## 
##     degree
## The following object is masked from 'package:plyr':
## 
##     join
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:zoo':
## 
##     yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
library(simplifyEnrichment)
## 
## ========================================
## simplifyEnrichment version 1.10.0
## Bioconductor page: https://bioconductor.org/packages/simplifyEnrichment/
## Github page: https://github.com/jokergoo/simplifyEnrichment
## Documentation: https://jokergoo.github.io/simplifyEnrichment/
## Examples: https://simplifyenrichment.github.io/
## 
## If you use it in published research, please cite:
## Gu, Z. simplifyEnrichment: an R/Bioconductor package for Clustering and 
##   Visualizing Functional Enrichment Results, Genomics, Proteomics & 
##   Bioinformatics 2022.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(simplifyEnrichment))
## ========================================

Load in data

library(rtracklayer)
gff<-rtracklayer::import("../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3")
gff<-as.data.frame(gff) %>% dplyr::select(-Parent)
dim(gff) # 478988     9
## [1] 478988     12
names(gff) 
##  [1] "seqnames"      "start"         "end"           "width"        
##  [5] "strand"        "source"        "type"          "score"        
##  [9] "phase"         "ID"            "transcript_id" "gene_id"
transcripts <- subset(gff, type == "transcript")
transcripts_gr <- makeGRangesFromDataFrame(transcripts, keep.extra.columns=TRUE) #extract length information
transcript_lengths <- width(transcripts_gr) #isolate length of each gene
seqnames<-transcripts_gr$ID #extract list of gene id 
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame

dim(transcripts) #33730    13
## [1] 33730    12
kegg <- read.delim("../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt",header = FALSE)
kegg <- as.data.frame(kegg)
colnames(kegg)[1] <- "gene_id" 
colnames(kegg)[2] <- "KEGG_new"
head(kegg)
##                                      gene_id KEGG_new
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a         
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b   K22584
## 3  Pocillopora_acuta_HIv2___RNAseq.g22918.t1         
## 4  Pocillopora_acuta_HIv2___RNAseq.g18333.t1   K03386
## 5   Pocillopora_acuta_HIv2___RNAseq.g7985.t1         
## 6  Pocillopora_acuta_HIv2___RNAseq.g13511.t1
eggnog<-read.delim("../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt")#this file contains all of the go terms, descriptions, kegg, etc
eggnog<- plyr::rename(eggnog, c("X.query"="gene_id"))
head(eggnog,2)
##                                      gene_id  seed_ortholog    evalue score
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120   364
## 2  Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123   355
##                                                                           eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
##   max_annot_lvl COG_category
## 1 33208|Metazoa            E
## 2 33208|Metazoa            O
##                                           Description Preferred_name
## 1    Cobalamin-independent synthase, Catalytic domain              -
## 2 negative regulation of male germ cell proliferation          PRDX4
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
##          EC   KEGG_ko
## 1  2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
##                                                                           KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2                                                                     ko04214,map04214
##   KEGG_Module KEGG_Reaction             KEGG_rclass
## 1      M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2           -             -                       -
##                             BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000       -    -             -
## 2 ko00000,ko00001,ko01000,ko04147       -    -             -
##                 PFAMs
## 1         Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
gogene <- merge(transcripts, eggnog, by=c("gene_id"), all=T)
gogene <- merge(gogene, kegg, by=c("gene_id"), all=T)
head(gogene,2)
##                                   gene_id                           seqnames
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t Pocillopora_acuta_HIv2___Sc0000013
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t Pocillopora_acuta_HIv2___Sc0000013
##     start     end width strand   source       type score.x phase
## 1 4542087 4551503  9417      + AUGUSTUS transcript      NA    NA
## 2 4639103 4647350  8248      + AUGUSTUS transcript      NA    NA
##                                        ID
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t
##                             transcript_id       seed_ortholog   evalue score.y
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t      45351.EDO27354 2.41e-93     317
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t 6087.XP_002166004.2 1.28e-38     164
##                                                                           eggNOG_OGs
## 1 COG0666@1|root,KOG0510@2759|Eukaryota,38G7Q@33154|Opisthokonta,3BCDU@33208|Metazoa
## 2                                              COG0666@1|root,KOG4177@2759|Eukaryota
##    max_annot_lvl COG_category                                Description
## 1  33208|Metazoa           DZ osmolarity-sensing cation channel activity
## 2 2759|Eukaryota            I                           spectrin binding
##   Preferred_name
## 1          TRPA1
## 2              -
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1 GO:0000302,GO:0001580,GO:0002791,GO:0002793,GO:0003008,GO:0003012,GO:0003674,GO:0004888,GO:0005034,GO:0005215,GO:0005216,GO:0005217,GO:0005244,GO:0005245,GO:0005261,GO:0005262,GO:0005488,GO:0005515,GO:0005575,GO:0005623,GO:0005886,GO:0005887,GO:0006810,GO:0006811,GO:0006812,GO:0006816,GO:0006873,GO:0006874,GO:0006875,GO:0006936,GO:0006939,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007166,GO:0007204,GO:0007600,GO:0007602,GO:0007606,GO:0007610,GO:0007638,GO:0008150,GO:0008324,GO:0009266,GO:0009314,GO:0009408,GO:0009409,GO:0009410,GO:0009416,GO:0009453,GO:0009581,GO:0009582,GO:0009583,GO:0009593,GO:0009605,GO:0009612,GO:0009628,GO:0009636,GO:0009719,GO:0009966,GO:0009967,GO:0009987,GO:0010033,GO:0010035,GO:0010037,GO:0010243,GO:0010378,GO:0010646,GO:0010647,GO:0010817,GO:0014070,GO:0014074,GO:0014832,GO:0014848,GO:0015075,GO:0015085,GO:0015267,GO:0015276,GO:0015278,GO:0015318,GO:0016020,GO:0016021,GO:0016043,GO:0016048,GO:0016324,GO:0019233,GO:0019722,GO:0019725,GO:0019932,GO:0022607,GO:0022803,GO:0022832,GO:0022834,GO:0022836,GO:0022838,GO:0022839,GO:0022843,GO:0022857,GO:0022890,GO:0023041,GO:0023051,GO:0023052,GO:0023056,GO:0030001,GO:0030003,GO:0030424,GO:0031000,GO:0031224,GO:0031226,GO:0031644,GO:0031646,GO:0032024,GO:0032421,GO:0032501,GO:0032879,GO:0032880,GO:0032991,GO:0033554,GO:0033555,GO:0034220,GO:0034605,GO:0034702,GO:0034703,GO:0035556,GO:0035690,GO:0035774,GO:0036270,GO:0038023,GO:0040011,GO:0040040,GO:0042221,GO:0042330,GO:0042331,GO:0042391,GO:0042493,GO:0042542,GO:0042592,GO:0042752,GO:0042802,GO:0042995,GO:0043005,GO:0043052,GO:0043269,GO:0043270,GO:0043279,GO:0043933,GO:0044057,GO:0044070,GO:0044085,GO:0044425,GO:0044459,GO:0044464,GO:0045177,GO:0046677,GO:0046873,GO:0046883,GO:0046887,GO:0046957,GO:0048265,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048878,GO:0050708,GO:0050714,GO:0050789,GO:0050794,GO:0050796,GO:0050801,GO:0050848,GO:0050850,GO:0050877,GO:0050896,GO:0050906,GO:0050907,GO:0050909,GO:0050912,GO:0050913,GO:0050951,GO:0050954,GO:0050955,GO:0050960,GO:0050961,GO:0050965,GO:0050966,GO:0050968,GO:0050974,GO:0050982,GO:0051046,GO:0051047,GO:0051049,GO:0051050,GO:0051179,GO:0051209,GO:0051222,GO:0051223,GO:0051234,GO:0051239,GO:0051240,GO:0051259,GO:0051260,GO:0051262,GO:0051282,GO:0051283,GO:0051289,GO:0051480,GO:0051606,GO:0051641,GO:0051649,GO:0051716,GO:0051930,GO:0051931,GO:0051969,GO:0052129,GO:0055065,GO:0055074,GO:0055080,GO:0055082,GO:0055085,GO:0060089,GO:0060341,GO:0060401,GO:0060402,GO:0061178,GO:0065003,GO:0065007,GO:0065008,GO:0070201,GO:0070417,GO:0070588,GO:0070838,GO:0070887,GO:0071241,GO:0071244,GO:0071310,GO:0071312,GO:0071313,GO:0071407,GO:0071415,GO:0071417,GO:0071466,GO:0071495,GO:0071840,GO:0071944,GO:0072347,GO:0072503,GO:0072507,GO:0072511,GO:0090087,GO:0090276,GO:0090277,GO:0097458,GO:0097553,GO:0097603,GO:0097604,GO:0098590,GO:0098655,GO:0098660,GO:0098662,GO:0098771,GO:0098796,GO:0098862,GO:0098900,GO:0098908,GO:0099094,GO:0099604,GO:0120025,GO:1901698,GO:1901699,GO:1901700,GO:1901701,GO:1902495,GO:1902531,GO:1902533,GO:1903522,GO:1903530,GO:1903532,GO:1903793,GO:1904058,GO:1904951,GO:1990351,GO:1990760
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
##   EC   KEGG_ko     KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass
## 1  - ko:K04984 ko04750,map04750           -             -           -
## 2  -         -                -           -             -           -
##                     BRITE                                 KEGG_TC CAZy
## 1 ko00000,ko00001,ko04040 1.A.4.6.1,1.A.4.6.2,1.A.4.6.3,1.A.4.6.5    -
## 2                       -                                       -    -
##   BiGG_Reaction                           PFAMs KEGG_new
## 1             - Ank,Ank_2,Ank_3,Ank_4,Ion_trans   K04984
## 2             - Ank,Ank_2,Ank_4,Ank_5,Ion_trans   K04984
dim(gogene)
## [1] 33730    33
geneInfo <- read.csv("../../output/WGCNA/WGCNA_ModuleMembership.csv") #this file was generated from the WGCNA analyses and contains the modules of interest
geneInfo<- plyr::rename(geneInfo, c("X"="gene_id"))
dim(geneInfo) # there are 9012 genes in our gene info file
## [1] 9012   40
geneInfo <- merge(gogene, geneInfo, by=c("gene_id")) #merging the GO and Kegg info to module membership for the 9012 genes

Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in GOs column

geneInfo$GOs <- gsub(",", ";", geneInfo$GOs)
geneInfo$GOs <- gsub('"', "", geneInfo$GOs)
geneInfo$GOs <- gsub("-", NA, geneInfo$GOs)

geneInfo$KEGG_new[geneInfo$KEGG_new == ""] <- NA
unique(geneInfo$moduleColor)
##  [1] "green"        "blue"         "salmon"       "turquoise"    "yellow"      
##  [6] "black"        "red"          "magenta"      "lightcyan"    "purple"      
## [11] "brown"        "pink"         "midnightblue" "tan"          "cyan"
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)]
dim(geneInfo)
## [1] 9012   73
write.csv(geneInfo, file = "../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv") #gene info for reference/supplement

Run the GOSeq function by module color to test for GO term enrichment. Due to high number of enriched GO terms, I am using a p-value threshold of <0.05 and using rrvgo package to reduce redundancy in list of GO terms.

Calcification - Up

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
calc_up_mods <- c("brown", "red", "black", "pink", "salmon", "blue")

nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")) #942
## [1] 942
nrow(geneInfo %>% dplyr::filter(moduleColor=="red")) #425
## [1] 425
nrow(geneInfo %>% filter(moduleColor=="black")) #396
## [1] 396
nrow(geneInfo %>% filter(moduleColor=="pink")) #220
## [1] 220
nrow(geneInfo %>% filter(moduleColor=="salmon")) #154
## [1] 154
nrow(geneInfo %>% filter(moduleColor=="blue")) #1989
## [1] 1989
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")), nrow(geneInfo %>% dplyr::filter(moduleColor=="red")), nrow(geneInfo %>% filter(moduleColor=="black")), nrow(geneInfo %>% filter(moduleColor=="pink")), nrow(geneInfo %>% filter(moduleColor=="salmon")), nrow(geneInfo %>% filter(moduleColor=="blue")))
## [1] 4126
# 4126

calc_down_mods <- c("turquoise","magenta","lightcyan")

nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")) #2558
## [1] 2558
nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")) #219
## [1] 219
nrow(geneInfo %>% filter(moduleColor=="lightcyan")) #65
## [1] 65
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")), nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")), nrow(geneInfo %>% filter(moduleColor=="lightcyan")))
## [1] 2842
# 2842

other_mods <- c("green","yellow", "purple", "midnightblue","cyan","tan")

sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="green")), nrow(geneInfo %>% dplyr::filter(moduleColor=="yellow")), nrow(geneInfo %>% filter(moduleColor=="purple")), nrow(geneInfo %>% filter(moduleColor=="midnightblue")), nrow(geneInfo %>% filter(moduleColor=="cyan")),nrow(geneInfo %>% filter(moduleColor=="tan")))
## [1] 2044
# 2044

# 4126 + 2842 + 2044 = 9012, which represents all of our genes

4126 genes are in the 6 modules significantly upregulated by calcification.

### Generate vector with names in just the module we are analyzing
# ID.vector <- geneInfo %>%
#   filter(moduleColor==c("brown", "red", "black", "pink", "salmon", "green")) %>%
#   #get_rows(.data[[module]]))%>%
#   pull(gene_id)

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("brown", "red", "black", "pink", "salmon", "blue")) %>%
  pull(gene_id)

length(ID.vector) #4126
## [1] 4126
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
GO_05 <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 654   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0050794            8.436918e-08                1.0000000       1395
## 2 2 GO:0065007            1.553493e-06                0.9999989       1574
## 3 3 GO:0050789            1.584882e-06                0.9999989       1479
## 4 4 GO:0048523            1.581326e-05                0.9999878        800
## 5 5 GO:0048518            3.043369e-05                0.9999758        950
## 6 6 GO:0051336            3.403563e-05                0.9999783        232
##   numInCat                                      term ontology
## 1     2785            regulation of cellular process       BP
## 2     3184                     biological regulation       BP
## 3     2981          regulation of biological process       BP
## 4     1569   negative regulation of cellular process       BP
## 5     1888 positive regulation of biological process       BP
## 6      415          regulation of hydrolase activity       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")

#compare_clustering_methods(Mat, plot_type = "heatmap")

pdf(file = "../../output/WGCNA/GO_analysis/calc_up_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 654 terms by 'binary_cut'... 184 clusters, used 2.05056 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcUpMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05.pdf", CalcUpMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 508  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 73

The reduced list of terms is 508 terms that falls under 73 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")

Plot reduced terms

#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification.pdf", plot=GO.plot_calcification, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcUpMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05_reduced.pdf", CalcUpMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

Calcification - Down

2842 genes are in the 4 modules significantly downregulated by calcification.

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names in just the module we are analyzing
ID.vector <- geneInfo%>%
  filter(moduleColor %in% c("turquoise","magenta","lightcyan"))%>%
  pull(gene_id)

length(ID.vector) #2842
## [1] 2842
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
GO_05 <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP") %>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat > 10) %>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 298   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0002181            8.295294e-15                        1         67
## 2 2 GO:0006614            2.463649e-13                        1         48
## 3 3 GO:0006412            3.096109e-13                        1        123
## 4 4 GO:0043043            1.598313e-12                        1        126
## 5 5 GO:0006613            2.273026e-12                        1         49
## 6 6 GO:0006518            7.543937e-12                        1        146
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 3      219                                                 translation       BP
## 4      231                                peptide biosynthetic process       BP
## 5       63               cotranslational protein targeting to membrane       BP
## 6      285                                   peptide metabolic process       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/calc_down_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 298 terms by 'binary_cut'... 87 clusters, used 0.521641 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 4 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcDownMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05.pdf", CalcDownMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 226  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 38

The reduced list of terms is 226 terms that falls under 38 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification_down <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification_down

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification_down.pdf", plot=GO.plot_calcification_down, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcDownMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05_reduced.pdf", CalcDownMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

code for by module

library(rrvgo)

# Define the unique module colors
module_colors <- na.omit(unique(geneInfo$moduleColor))

# Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)

# Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

 
# Loop over each unique module color
for (color in module_colors) {
 # Filter geneInfo based on the current color
  color_filtered <- geneInfo %>% filter(moduleColor == color)

  # Generate vector with names in just the module we are analyzing
  ID.vector <- color_filtered$gene_id
  
  length(ID.vector)

  # Get a list of GO Terms for each module
  GO.terms <- color_filtered %>%
    dplyr::select(GOs, gene_id) %>%
    rename(GOs = "GO.terms")
  
  dim(GO.terms)

  ## Format to have one GO term per row with gene ID repeated
  split <- strsplit(as.character(GO.terms$GO.terms), ";") 
  split2 <- data.frame(v1 = rep.int(GO.terms$gene_id, sapply(split, length)), v2 = unlist(split)) 
  colnames(split2) <- c("gene", "GO.terms")
  GO.terms <- split2

  ## Construct list of genes with 1 for genes in module and 0 for genes not in the module
  gene.vector <- as.integer(ALL.vector %in% ID.vector) 
  names(gene.vector) <- ALL.vector # set names
  # Weight gene vector by bias for length of gene 
  pwf <- nullp(gene.vector, ID.vector, bias.data = LENGTH.vector) 

  # Run goseq using Wallenius method for all categories of GO terms 
  GO.wall <- goseq(pwf, ID.vector, gene2cat = GO.terms, test.cats = c("GO:BP", "GO:MF", "GO:CC"), method = "Wallenius", use_genes_without_cat = TRUE)
  GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
  colnames(GO)[1] <- "GOterm"

  # Adjust p-values 
  GO$bh_adjust <- p.adjust(GO$over_represented_pvalue, method = "BH") 

  # Filtering for p < 0.01
  GO <- GO %>%
    dplyr::filter(bh_adjust < 0.00001) %>%
    dplyr::arrange(., ontology, bh_adjust)
   
  # Write file of results 
  write.csv(GO, file = paste0("../../output/WGCNA/GO_analysis/goseq_pattern_", color, ".csv"))

  go_results <- GO

  go_results<-go_results%>%
      filter(ontology=="BP")%>%
      filter(bh_adjust != "NA") %>%
      #filter(numInCat>100)%>%
      arrange(., bh_adjust)
  
  #Reduce/collapse GO term set with the rrvgo package 
  simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
  
   #calculate similarity 
  scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
  reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
  
  #keep only the goterms from the reduced list
  go_results <- go_results %>% filter(GOterm %in% reducedTerms$go)
  
  #add in parent terms to list of go terms 
  go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
  
  #plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot <-  ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())

 GO.plot
 
  length(colnames(go_results)[go_results$ParentTerm=="cation transport"])
  length(colnames(go_results)[go_results$ParentTerm=="inorganic ion homeostasis"])
  length(colnames(go_results)[go_results$ParentTerm=="regulation of cellular response to stress"])
}

Merge the frequency of GOterms for both up- and down-regulation of calcification

Reduce the lists together

GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 654   7
go_results_BP_down <- GO_05_down %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 298   7
all_enriched_terms_up_down <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm)
length(all_enriched_terms_up_down)
## [1] 952
length(unique(all_enriched_terms_up_down))
## [1] 947

By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules.

library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down),
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity 
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores = "size",
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 730  10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 78
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 43
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>% mutate(Factor = "Up") 
head(cal_up_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794            8.436918e-08                1.0000000       1395
## 2 GO:0065007            1.553493e-06                0.9999989       1574
## 3 GO:0050789            1.584882e-06                0.9999989       1479
## 4 GO:0048523            1.581326e-05                0.9999878        800
## 5 GO:0048518            3.043369e-05                0.9999758        950
## 6 GO:0051336            3.403563e-05                0.9999783        232
##   numInCat                                      term ontology
## 1     2785            regulation of cellular process       BP
## 2     3184                     biological regulation       BP
## 3     2981          regulation of biological process       BP
## 4     1569   negative regulation of cellular process       BP
## 5     1888 positive regulation of biological process       BP
## 6      415          regulation of hydrolase activity       BP
##                                      ParentTerm Factor
## 1 regulation of macromolecule metabolic process     Up
## 2 regulation of macromolecule metabolic process     Up
## 3 regulation of macromolecule metabolic process     Up
## 4   negative regulation of biosynthetic process     Up
## 5 regulation of macromolecule metabolic process     Up
## 6              regulation of catalytic activity     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
head(cal_down_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181            8.295294e-15                        1         67
## 2 GO:0006614            2.463649e-13                        1         48
## 3 GO:0006412            3.096109e-13                        1        123
## 4 GO:0043043            1.598313e-12                        1        126
## 5 GO:0006613            2.273026e-12                        1         49
## 6 GO:0006518            7.543937e-12                        1        146
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 3      219                                                 translation       BP
## 4      231                                peptide biosynthetic process       BP
## 5       63               cotranslational protein targeting to membrane       BP
## 6      285                                   peptide metabolic process       BP
##          ParentTerm Factor
## 1       translation   Down
## 2 protein transport   Down
## 3       translation   Down
## 4       translation   Down
## 5 protein transport   Down
## 6       translation   Down

Merge up and down-regulation of calcification GOterms

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification. The list has been further reduced by using the package rrvgo.

all_terms <- rbind(cal_down_terms,cal_up_terms)

all_terms <-  all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm")]

all_terms$GOterm<-as.factor(all_terms$GOterm)

dim(all_terms) #734 reduced terms 
## [1] 734   9
length(unique(all_terms$GOterm)) #this represents 730 unique terms between the two lists of reduced terms
## [1] 730
length(unique(all_terms$ParentTerm)) #this represents 87 unique terms between the two lists of reduced terms
## [1] 87

Unique terms

This is collapsing the list from 734 rows to 730, representing the 730 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.

goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", "),
    term = paste(unique(term), collapse = ", ")
  )
dim(goterms_shared)
## [1] 730   4
goterms_shared_full <- goterms_shared %>%
  left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 730 rows back to the 734 in all_terms
  mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
  mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
  dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
  group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
  dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
            pvalue_Down = na.omit(pvalue_Down)[1]) %>% #carry over the p-value for the term in the down direction, by taking the first non-NA value.
  rename(Factor.x = "Factor") #rename column 
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm.csv")

plots

goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()

goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")

nrow(goterms_up)
## [1] 504
nrow(goterms_down)
## [1] 222
nrow(goterms_shared_only)
## [1] 4
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_shared_only) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 77
length(unique(goterms_down$ParentTerm))
## [1] 42
length(unique(goterms_shared_only$ParentTerm))
## [1] 2
freq_fig_up_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.04) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_down_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.04) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) 

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Shared_ParentTerms.pdf", plot=compare_figs, dpi=300, height=3, units="in", limitsize=FALSE)
## Saving 7 x 3 in image
freq_fig_up_pval <- ggplot(goterms_up, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)

freq_fig_down_pval <- ggplot(goterms_down, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank())

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Unique_ParentTerms.pdf", plot=compare_figs, dpi=300, height=10, units="in", limitsize=FALSE)
## Saving 7 x 10 in image

GOSlim

I downloaded the GOSlim terms list from the following website on 4/5/2024: http://www.informatics.jax.org/gotools/data/input/map2MGIslim.txt

I did so by going to this link and right clicking “here” at “Tab-delimited file mapping GO_ids to MGI GO_Slim category available for download here” and saying “Save link as…” , and I saved it as “map2MGIslim.txt”. I then converted this to CSV and uploaded to this github repository here.

Add slim terms to dataset

GOslim <- read.csv("../../data/map2MGIslim.csv") %>% dplyr::select(-c(term,aspect))
goterms_shared_full_slim <- goterms_shared_full %>%
  inner_join(GOslim, by = c("GOterm" = "GO_id"))

goterms_up_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Up" | Factor=="Down, Up")

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=pvalue_Up, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim.pdf", goterms_up_full_slim_plot, width = 5, height = 35)

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=log10(pvalue_Up), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim_Log.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_down_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Down" | Factor=="Down, Up")

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=pvalue_Down, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=log10(pvalue_Down), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim_Log.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

For each parent term in this list, how many are “Up” by calcification, “Down”, or both

SharedGOterms = # of GO terms within the parent term that are in the list for “Down”, “Up”, or “Down, Up”

result_parent_unique <- goterms_shared %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
parent_filtered_up <- result_parent_unique %>%
  dplyr::filter(Factor=="Up" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 
parent_filtered_down <- result_parent_unique %>%
  dplyr::filter(Factor=="Down" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Count number of GOterms by ParentTerm for the upregulation of calcification

result_up <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Up")

head(result_up)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 ATP biosynthetic process                              5 Up                    
## 2 Arp2/3 complex-mediated actin nucleati…              16 Up                    
## 3 DNA dealkylation involved in DNA repair               6 Up                    
## 4 Golgi organization                                    2 Up                    
## 5 IRE1-mediated unfolded protein response              16 Up                    
## 6 P granule assembly                                    1 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(result_up)
## [1] 78  3
merged_up <- parent_filtered_up %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_up, by = "ParentTerm") #join with result from above to get the Number.of.Terms column

merged_up_clean <- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 ATP biosynthetic …                    5               5 Up                    
## 2 Arp2/3 complex-me…                   16              16 Up                    
## 3 DNA dealkylation …                    6               6 Up                    
## 4 Golgi organization                    2               2 Up                    
## 5 IRE1-mediated unf…                   16              16 Up                    
## 6 P granule assembly                    1               1 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(merged_up_clean)
## [1] 78  4

Count number of GOterms by ParentTerm for the downregulation of calcification

result_down <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Down")

dim(result_down)
## [1] 43  3
head(result_down)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 ATP biosynthetic process                             24 Down                  
## 2 Arp2/3 complex-mediated actin nucleati…               2 Down                  
## 3 IRE1-mediated unfolded protein response               2 Down                  
## 4 NADH regeneration                                     1 Down                  
## 5 P granule assembly                                    1 Down                  
## 6 biological process involved in intersp…               1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
merged_down <- parent_filtered_down %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_down, by = "ParentTerm") 

merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 ATP biosynthetic …                   24              24 Down                  
## 2 Arp2/3 complex-me…                    2               2 Down                  
## 3 IRE1-mediated unf…                    2               2 Down                  
## 4 NADH regeneration                     1               1 Down                  
## 5 P granule assembly                    1               1 Down                  
## 6 biological proces…                    1               1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Frequency >10 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
dim(cal_freq_terms_filtered_up)
## [1] 18  4

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >10 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=10)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 7 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 ATP biosynthetic …                   24              24 Down                  
## 2 biosynthetic proc…                   15              15 Down                  
## 3 metabolic process                    13              13 Down                  
## 4 organic acid meta…                   12              12 Down                  
## 5 peptidyl-serine p…                   13              13 Down                  
## 6 proteasome-mediat…                   24              24 Down                  
## 7 protein transport                    13              13 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Frequency >20 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 4 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 positive regulati…                   24              24 Up                    
## 2 regulation of cal…                   22              22 Up                    
## 3 regulation of cat…                   21              21 Up                    
## 4 regulation of mac…                   20              20 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_gr20.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >20 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=20)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 ATP biosynthetic …                   24              24 Down                  
## 2 proteasome-mediat…                   24              24 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_gr20.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

All terms up

cal_freq_terms_filtered_up_all <- merged_up_clean %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up_all
## # A tibble: 78 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 ATP biosynthetic…                    5               5 Up                    
##  2 Arp2/3 complex-m…                   16              16 Up                    
##  3 DNA dealkylation…                    6               6 Up                    
##  4 Golgi organizati…                    2               2 Up                    
##  5 IRE1-mediated un…                   16              16 Up                    
##  6 P granule assemb…                    1               1 Up                    
##  7 abscission                           1               1 Up                    
##  8 biological proce…                    3               3 Up                    
##  9 cell communicati…                    4               4 Up                    
## 10 cell differentia…                    8               8 Up                    
## # ℹ 68 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_ALL.pdf", plot=freq_fig_up, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image

All terms down

cal_freq_terms_filtered_down_all <- merged_down_clean %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down_all
## # A tibble: 42 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 ATP biosynthetic…                   24              24 Down                  
##  2 Arp2/3 complex-m…                    2               2 Down                  
##  3 IRE1-mediated un…                    2               2 Down                  
##  4 NADH regeneration                    1               1 Down                  
##  5 P granule assemb…                    1               1 Down                  
##  6 biological proce…                    1               1 Down                  
##  7 biosynthetic pro…                   15              15 Down                  
##  8 cell communicati…                    3               3 Down                  
##  9 cellular aldehyd…                    1               1 Down                  
## 10 cellular macromo…                    2               2 Down                  
## # ℹ 32 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,30))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_ALL.pdf", plot=freq_fig_down, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, ncol=2, align="h")
compare_figs